Nesse arquivo exploramos a possibilidade de transformar as séries,(como os Wavelets) porém aqui diferente dos Wavelets (tende a aumentar a dimensionalidade com uma descrição mais precisa da série) vamos seguir o caminho exatamente inverso, reduzir as dimensões.
Basicamente, estamos com o seguinte pensamento:
Esses problemas com multiplas variaveis é dificil de se entender, e se for possível extrair as melhores informações de uma série e ainda reduzir a dimensionalidade dos dados?
Para responder isso usamos métodos como PCA e LDA; No arquivo abaixo usamos os últimos 2 lags e durante a construção de um modelo usamos o PCA para reduzir a dimensionalidade do nosso problema.
Obs:By default, the function keeps only the PCs that are necessary to explain at least 95% of the variability in the data, but this can be changed through the argument thresh.page 80
In [12]:
library(caret)
library(kernlab)
library(pROC)
groups <- read.csv(file="./MovementAAL/groups/MovementAAL_DatasetGroup.csv",head=TRUE,sep=",")
targetAll <- read.csv(file="./MovementAAL/dataset/MovementAAL_target.csv",head=TRUE,sep=",")
In [13]:
#Group 1
allDataGroup1<-list()
allDataGroup1Target<-list()
groups1 = groups[groups$dataset_ID==1, ]
index<-1
for (id in groups1$X.sequence_ID){
caminho <-paste("./MovementAAL/dataset/MovementAAL_RSS_",id,".csv",sep="")
allDataGroup1[[index]]<-read.csv(file=caminho,head=TRUE,sep=",")
allDataGroup1Target[index]<-targetAll[[2]][id]
index<-index+1
}
wtData <- NULL
minStepsBack = 1
for (i in 1:length(allDataGroup1)){
aa<- t(unlist(allDataGroup1[[i]][(nrow(allDataGroup1[[i]])-minStepsBack):nrow(allDataGroup1[[i]]),]))
wtData <- rbind(wtData, aa)
}
wtData <- as.data.frame(wtData)
data = unlist(allDataGroup1Target)
target = factor(data,labels=c("No","Yes"))
frameDataFinal <- data.frame(cbind(target, wtData))
head(frameDataFinal)
##use only lagged data
Out[13]:
In [14]:
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=10)
allAccuracyGroup1 <- c()
for( i in 1:length(inTraining)){
training <- frameDataFinal[ inTraining[[i]],]
testing <- frameDataFinal[-inTraining[[i]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training, preProcess=c("pca"),
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[i]]])
allAccuracyGroup1 <- c(allAccuracyGroup1,matrix[3]$overall[[1]])
}
mean(allAccuracyGroup1)
sd(allAccuracyGroup1)
Out[14]:
Out[14]:
In [15]:
#Group 2
allDataGroup2<-list()
allDataGroup2Target<-list()
groups2 = groups[groups$dataset_ID==2, ]
index<-1
for (id in groups2$X.sequence_ID){
caminho <-paste("./MovementAAL/dataset/MovementAAL_RSS_",id,".csv",sep="")
allDataGroup2[[index]]<-read.csv(file=caminho,head=TRUE,sep=",")
allDataGroup2Target[index]<-targetAll[[2]][id]
index<-index+1
}
wtData <- NULL
minStepsBack = 1
for (i in 1:length(allDataGroup2)){
aa<- t(unlist(allDataGroup2[[i]][(nrow(allDataGroup2[[i]])-minStepsBack):nrow(allDataGroup2[[i]]),]))
wtData <- rbind(wtData, aa)
}
wtData <- as.data.frame(wtData)
data = unlist(allDataGroup2Target)
target = factor(data,labels=c("No","Yes"))
frameDataFinal <- data.frame(cbind(target, wtData))
head(frameDataFinal)
##use only lagged data
Out[15]:
In [16]:
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=10)
allAccuracyGroup2 <- c()
#By default, the function keeps only the PCs that are necessary to explain at least 95% of
#the variability in the data, but this can be changed through the argument thresh
for( i in 1:length(inTraining)){
training <- frameDataFinal[ inTraining[[i]],]
testing <- frameDataFinal[-inTraining[[i]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training, preProcess=c("pca"),
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[i]]])
allAccuracyGroup2 <- c(allAccuracyGroup2,matrix[3]$overall[[1]])
}
mean(allAccuracyGroup2)
sd(allAccuracyGroup2)
Out[16]:
Out[16]:
In [17]:
#Group 3
allDataGroup3<-list()
allDataGroup3Target<-list()
groups3 = groups[groups$dataset_ID==3, ]
index<-1
for (id in groups2$X.sequence_ID){
caminho <-paste("./MovementAAL/dataset/MovementAAL_RSS_",id,".csv",sep="")
allDataGroup3[[index]]<-read.csv(file=caminho,head=TRUE,sep=",")
allDataGroup3Target[index]<-targetAll[[2]][id]
index<-index+1
}
wtData <- NULL
minStepsBack = 1
for (i in 1:length(allDataGroup3)){
aa<- t(unlist(allDataGroup3[[i]][(nrow(allDataGroup3[[i]])-minStepsBack):nrow(allDataGroup3[[i]]),]))
wtData <- rbind(wtData, aa)
}
wtData <- as.data.frame(wtData)
data = unlist(allDataGroup3Target)
target = factor(data,labels=c("No","Yes"))
frameDataFinal <- data.frame(cbind(target, wtData))
head(frameDataFinal)
##use only lagged data
Out[17]:
In [18]:
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=10)
allAccuracyGroup3 <- c()
#By default, the function keeps only the PCs that are necessary to explain at least 95% of
#the variability in the data, but this can be changed through the argument thresh
for( i in 1:length(inTraining)){
training <- frameDataFinal[ inTraining[[i]],]
testing <- frameDataFinal[-inTraining[[i]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training, preProcess=c("pca"),
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[i]]])
allAccuracyGroup3 <- c(allAccuracyGroup3,matrix[3]$overall[[1]])
}
mean(allAccuracyGroup3)
sd(allAccuracyGroup3)
Out[18]:
Out[18]:
In [19]:
#All Groups
allData<-list()
allDataTarget<-list()
targetAll <- read.csv(file="./MovementAAL/dataset/MovementAAL_target.csv",head=TRUE,sep=",")
index<-1
for (id in targetAll$X.sequence_ID){
caminho <-paste("./MovementAAL/dataset/MovementAAL_RSS_",id,".csv",sep="")
allData[[index]]<-read.csv(file=caminho,head=TRUE,sep=",")
allDataTarget[index]<-targetAll[[2]][id]
index<-index+1
}
wtData <- NULL
minStepsBack = 1
for (i in 1:length(allData)){
aa<- t(unlist(allData[[i]][(nrow(allData[[i]])-minStepsBack):nrow(allData[[i]]),]))
wtData <- rbind(wtData, aa)
}
wtData <- as.data.frame(wtData)
data = unlist(allDataTarget)
target = factor(data,labels=c("No","Yes"))
frameDataFinal <- data.frame(cbind(target, wtData))
head(frameDataFinal)
Out[19]:
In [20]:
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=10)
allAccuracy <- c()
for( i in 1:length(inTraining)){
training <- frameDataFinal[ inTraining[[i]],]
testing <- frameDataFinal[-inTraining[[i]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training,preProcess=c("pca"),
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[i]]])
allAccuracy <- c(allAccuracy,matrix[3]$overall[[1]])
}
mean(allAccuracy)
sd(allAccuracy)
Out[20]:
Out[20]:
In [21]:
#All groups datasets Confusion Matrix
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=1)
training <- frameDataFinal[ inTraining[[1]],]
testing <- frameDataFinal[-inTraining[[1]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training,preProcess=c("pca"),
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[1]]])
matrix
Out[21]:
In [22]:
#ROC CURVE AND AUC
predsProb<- predict(svmLinearFit, newdata = testing,type="prob")
outcome<- predsProb[,2]
classes <- frameDataFinal$target[-inTraining[[1]]]
rocobj <- roc(classes, outcome,levels=c("No","Yes"))
plot(rocobj)
Out[22]: